############################################################################################################
############################################################################################################
################################################ START HERE ################################################
############################################################################################################
############################################################################################################
# ----------------------------
# Setting up environment
# ---------------------------
# Clean environment
rm(list = ls(all.names = TRUE)) # will clear all objects including hidden objects
gc() # free up memory and report the memory usage
options(max.print = .Machine$integer.max, scipen = 999, stringsAsFactors = F, dplyr.summarise.inform = F) # avoid truncated output in R console and scientific notation
# Loading relevant libraries 
library(tidyverse) # includes ggplot2, for data visualisation. dplyr, for data manipulation.
# Set input path
path <- "INPUT_DIRECTORY"
setwd(path)
list.files(path)
set.seed(42)

# ----------------------------
# 1. Load Required Libraries
# ----------------------------
library(ggalluvial)
library(AnnotationDbi)
library(biomaRt)
library(celldex)
library(cluster)
library(DoubletFinder)
library(dplyr)
library(EnhancedVolcano)
library(glmGamPoi)
library(ggplot2)
library(ggpubr)
library(ggridges)
library(gridExtra)
library(MAST)
library(Matrix)
library(patchwork)
library(pheatmap)
library(ProjecTILs)
library(RColorBrewer)
library(reticulate)
library(scales)
library(sctransform)
library(Seurat)
library(SeuratObject)
library(SeuratData)
library(viridis)

# ----------------------------
# 2. Map mitochondrial genes - GeneID to Ensmbl
# ----------------------------
# Connect to Ensembl (version 112, for GENCODE M35)
ensembl <- useEnsembl(biomart = "genes", dataset = "mmusculus_gene_ensembl", version = 113)
# Retrieve mitochondrial genes (Chromosome MT)
mito_gene_info <- getBM(
  attributes = c("ensembl_gene_id_version", "external_gene_name", "chromosome_name"),
  filters = "chromosome_name",
  values = "MT",
  mart = ensembl
)
# Extract only Ensembl gene IDs for mitochondrial genes
mito_genes_ensembl <- unique(mito_gene_info$ensembl_gene_id_version)

# Retrieve ribosomal genes by filtering for gene names containing "Rps" or "Rpl"
ribosomal_gene_info <- getBM(
  attributes = c("ensembl_gene_id_version", "external_gene_name", "chromosome_name"),
  filters = "external_gene_name",
  values = c(paste0("Rps", 1:40), paste0("Rpl", 1:40)),  # Covering common ribosomal gene names
  mart = ensembl
)
# Extract only Ensembl gene IDs for ribosomal genes
ribosomal_genes_ensembl <- unique(ribosomal_gene_info$ensembl_gene_id_version)

# Display the first few results
cat("Mitochondrial Genes:\n")
head(mito_genes_ensembl)
cat("\nRibosomal Genes:\n")
head(ribosomal_genes_ensembl)

# ----------------------------
# 3. Load Files, Create Seurat Objects, and Save Raw Files with UMAP comparisons
# ----------------------------
# Define sample metadata
sample_info <- data.frame(
  sample = c("RGc8", "RGc9", "RGc10", "RGc11", "RGc12", "RGc13"),
  group  = c("FloxFlox", "FloxFlox", "FloxFlox", "CreFlox", "CreFlox", "CreFlox"),
  stringsAsFactors = FALSE
)

# Create folder for saving objects and plots if it doesn't exist
raw_folder <- "RDS"
if (!dir.exists(raw_folder)) {
  dir.create(raw_folder, recursive = TRUE)
}

# Loop through each sample
for (i in 1:nrow(sample_info)) {
  sample_name <- sample_info$sample[i]
  condition <- sample_info$group[i]
  
  # File paths
  matrix_file <- paste0(sample_name, "_quants_mat.mtx")
  barcodes_file <- paste0(sample_name, "_quants_mat_rows.txt")  # Cell names
  genes_file <- paste0(sample_name, "_quants_mat_cols.txt")       # Gene IDs
  
  # Load the sparse matrix
  expression_matrix <- readMM(matrix_file)
  
  # Read barcodes and gene names
  barcodes <- readLines(barcodes_file)
  genes <- readLines(genes_file)
  
  # Assign row and column names
  rownames(expression_matrix) <- barcodes
  colnames(expression_matrix) <- genes
  
  # Convert dgTMatrix to dgCMatrix
  expression_matrix <- as(expression_matrix, "CsparseMatrix")
  
  # Create Seurat object (transpose so genes are rows)
  seurat_obj <- CreateSeuratObject(counts = t(expression_matrix), 
                                   project = sample_name, 
                                   min.cells = 3, 
                                   min.features = 200)
  
  # Add metadata (adjust as needed)
  seurat_obj$sample <- paste0(sample_name, "_", condition, "_", barcodes)
  seurat_obj@meta.data <- separate(seurat_obj@meta.data, col = 'sample', 
                                   into = c('Sample', 'Condition', 'Barcode'), sep = '_')
  
  # Dynamically assign Seurat object with a sample-specific name in the Global Environment
  assign(paste0(sample_name, "_", condition), seurat_obj, envir = .GlobalEnv)
  
  # Save the raw Seurat object to disk
  saveRDS(seurat_obj, file = paste0(raw_folder, "/", sample_name, "_raw.RDS"))
  
  # -------------------------------------------------
  # UMAP BEFORE filtering for Ptprc expression
  # -------------------------------------------------
  seurat_obj <- NormalizeData(seurat_obj)
  seurat_obj <- FindVariableFeatures(seurat_obj)
  seurat_obj <- ScaleData(seurat_obj)
  seurat_obj <- RunPCA(seurat_obj, verbose = FALSE)
  seurat_obj <- RunUMAP(seurat_obj, dims = 1:10, verbose = FALSE)
  
  umap_before <- DimPlot(seurat_obj, reduction = "umap", group.by = "Condition", label = TRUE) +
    ggtitle(paste(sample_name, "UMAP Before Filtering"))
  
  # -------------------------------------------------
  # Subset cells with Ptprc expression > 0 (CD45 positive)
  # -------------------------------------------------
  seurat_obj_cd45 <- subset(seurat_obj, cells = colnames(seurat_obj)[GetAssayData(seurat_obj, slot = "counts")["ENSMUSG00000026395.17", ] > 0])
  
  # Re-run the workflow on the filtered object
  seurat_obj_cd45 <- NormalizeData(seurat_obj_cd45)
  seurat_obj_cd45 <- FindVariableFeatures(seurat_obj_cd45)
  seurat_obj_cd45 <- ScaleData(seurat_obj_cd45)
  seurat_obj_cd45 <- RunPCA(seurat_obj_cd45, verbose = FALSE)
  seurat_obj_cd45 <- RunUMAP(seurat_obj_cd45, dims = 1:10, verbose = FALSE)
  
  umap_after <- DimPlot(seurat_obj_cd45, reduction = "umap", group.by = "Condition", label = TRUE) +
    ggtitle(paste(sample_name, "UMAP After Filtering"))
  
  # -------------------------------------------------
  # Combine and save the UMAP plots side by side
  # -------------------------------------------------
  combined_umap <- umap_before + umap_after
  ggsave(filename = paste0(raw_folder, "/", sample_name, "_UMAP_comparison.png"), 
         plot = combined_umap, width = 10, height = 5)
  
  # Optionally, save the filtered Seurat object
  saveRDS(seurat_obj_cd45, file = paste0(raw_folder, "/", sample_name, "_cd45.RDS"))
}

# ----------------------------
# 4. Clean objects, filter seurat objects and save filtered files
# ----------------------------
# After processing all samples, remove the dynamically assigned Seurat objects
sample_obj_names <- sapply(1:nrow(sample_info), function(i) {
  paste0(sample_info$sample[i], "_", sample_info$group[i])
})
rm(list = sample_obj_names)
gc()

# Now you can load the raw RDS files
dir_list <- list.files("RDS", pattern = "_cd45\\.RDS$")
# Remove the trailing "_cd45.RDS" to extract sample names
dir_list <- sub("_cd45\\.RDS$", "", dir_list)

# Expected nExp (DoubletFinder) for each sample - According to https://github.com/chris-mcginnis-ucsf/DoubletFinder/issues/76
nExp_values <- data.frame(
  sample = c("RGc8", "RGc9", "RGc10", "RGc11", "RGc12", "RGc13"),
  nExp_value = c(0.006, 0.023, 0.017, 0.017, 0.004, 0.006)
)

for (sample in dir_list) {
  # Read the raw Seurat object for the current sample
  raw_data <- readRDS(file = paste0("RDS/", sample, "_cd45.RDS"))
  seurat_obj <- raw_data
  
  ### RNA Preprocessing: Normalize, Find Variable Features, Scale
  seurat_obj <- NormalizeData(object = seurat_obj, normalization.method = "LogNormalize", scale.factor = 10000)
  seurat_obj <- FindVariableFeatures(seurat_obj, selection.method = "vst", nfeatures = 3000)
  seurat_obj <- ScaleData(object = seurat_obj)
  seurat_obj <- RunPCA(seurat_obj, verbose = FALSE)
  seurat_obj <- FindNeighbors(seurat_obj, dims = 1:20)
  seurat_obj <- FindClusters(seurat_obj)
  seurat_obj <- RunUMAP(seurat_obj, dims = 1:20, verbose = FALSE)

  ### Doublet Prediction using DoubletFinder (no ground-truth)
  sweep.res <- paramSweep(seurat_obj, PCs = 1:20)
  sweep.stats <- summarizeSweep(sweep.res, GT = FALSE)
  bcmvn <- find.pK(sweep.stats)
  # Choose optimal pK based on highest BCmetric - You need to choose the pK value that gives the highest BCmetric value
  pK <- bcmvn %>%
    filter(BCmetric == max(BCmetric)) %>%
    dplyr::select(pK)
  pK <- as.numeric(as.character(pK[[1]]))
  # Homotypic Doublet Proportion Estimate
  annotations <- seurat_obj@meta.data$seurat_clusters
  homotypic.prop <- modelHomotypic(annotations)
  # Retrieve the specific nExp_value for the current sample
  nExp_value <- nExp_values$nExp_value[nExp_values$sample == sample]
  if (length(nExp_value) == 0) {
    stop(paste("No nExp_value found for sample:", sample))
  }
  
  nExp_poi <- round(nExp_value * nrow(seurat_obj@meta.data))
  nExp_poi.adj <- round(nExp_poi * (1 - homotypic.prop))
  
  seurat_obj <- doubletFinder(seurat_obj, pN = 0.25, pK = pK, nExp = nExp_poi.adj, PCs = 1:20)
  # Identify the column with the doublet classification
  DF.name <- colnames(seurat_obj@meta.data)[grepl("DF.classification", colnames(seurat_obj@meta.data))]
  
  print(
    DimPlot(seurat_obj, group.by = DF.name, label = TRUE) +
      ggtitle(paste0("Singlet vs. Doublet Classification - ", sample)) +
      theme_minimal()
  )
  
  ### QC Plots for Doublet Predictions
  qc_folder <- "QC"
  if (!dir.exists(qc_folder)) {
    dir.create(qc_folder, recursive = TRUE)
  }
  pdf(file = paste0(qc_folder, "/", sample, "_QCDoublets.pdf"), width = 23.4, height = 16.5)
  plot_a <- cowplot::plot_grid(ncol = 2,
                               VlnPlot(seurat_obj, features = "nFeature_RNA", group.by = DF.name, pt.size = 0.1),
                               DimPlot(seurat_obj, group.by = DF.name) + NoAxes())
  print(plot_a)
  print(DimPlot(seurat_obj, group.by = DF.name))
  print(VlnPlot(seurat_obj, features = "nFeature_RNA", group.by = DF.name, pt.size = 0.1))
  dev.off()
  
  ### Remove Predicted Doublets
  seurat_obj <- seurat_obj[, seurat_obj@meta.data[, DF.name] == "Singlet"]
  cat("Dimensions after doublet removal for", sample, ":\n")
  print(dim(seurat_obj))
  
  ### Quality Control: Add Mitochondrial and Ribosomal Percentages
  # Use the Ensembl gene lists from above
  unique_mito_genes <- intersect(rownames(seurat_obj), unique(mito_genes_ensembl, rownames(seurat_obj), value = TRUE))
  seurat_obj$percent.mt <- PercentageFeatureSet(seurat_obj, features = unique_mito_genes)
  
  unique_ribo_genes <- intersect(rownames(seurat_obj), unique(ribosomal_genes_ensembl, rownames(seurat_obj), value = TRUE))
  seurat_obj$percent.ribo <- PercentageFeatureSet(seurat_obj, features = ribosomal_genes_ensembl)
  
  feats <- c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.ribo")
  
  ### Filter Out Cells Based on QC Metrics
  # Remove cells with high mitochondrial percentage (<10% mito) and low ribosomal content (>10% ribo)
  selected_mito <- WhichCells(seurat_obj, expression = percent.mt < 5)
  selected_ribo <- WhichCells(seurat_obj, expression = percent.ribo > 5)
  seurat_obj <- subset(seurat_obj, cells = intersect(selected_mito, selected_ribo))
  
  # Further filter: only consider cells with at least 200 genes and ≥200 counts
  selected_cells <- WhichCells(seurat_obj, expression = nFeature_RNA > 200 & nCount_RNA >= 200)
  Sobject <- subset(seurat_obj, cells = selected_cells)
  
  cat("Dimensions after QC filtering for", sample, ":\n")
  print(dim(Sobject))
  
  # Optionally, remove temporary variables to clean up the workspace
  rm(selected_mito, selected_ribo, sweep.res, sweep.stats, bcmvn, DF.name, feats, nExp, pK)
  
  ### SCT Preprocessing: Normalize, Find Variable Features, Scale, PCA
  Sobject <- SCTransform(Sobject, verbose = FALSE, vars.to.regress = "percent.mt") # Substitutes NormalizeData, FindVariableFeatures and ScaleData
  Sobject <- RunPCA(Sobject, verbose = FALSE)
  
  ### Clustering
  Sobject <- Sobject %>%
    FindNeighbors(reduction = "pca", dims = 1:20) %>%
    FindClusters(resolution = seq(0.1, 1, 0.1), algorithm = 4, method="igraph")
  
  ### RunUMAP
  Sobject <- RunUMAP(Sobject, dims = 1:20, verbose = FALSE)
  
  ### Save Final Filtered and Clustered Object
  if (!dir.exists("RDS/Filtered")) {
    dir.create("RDS/Filtered", recursive = TRUE)
  }
  saveRDS(Sobject, file = paste0("RDS/Filtered/", sample, "_filtered.RDS"))
  
  # Dynamically assign the final object with name seurat_obj_<sample> in the global environment
  assign(paste0("seurat_obj_", sample), Sobject, envir = .GlobalEnv)
  
  # Remove intermediate objects
  rm(seurat_obj, Sobject)
}

# Generate individual plots
p4.1 <- DimPlot(seurat_obj_RGc8, reduction = 'umap')
p4.2 <- DimPlot(seurat_obj_RGc9, reduction = 'umap')
p4.3 <- DimPlot(seurat_obj_RGc10, reduction = 'umap')
p4.4 <- DimPlot(seurat_obj_RGc11, reduction = 'umap')
p4.5 <- DimPlot(seurat_obj_RGc12, reduction = 'umap')
p4.6 <- DimPlot(seurat_obj_RGc13, reduction = 'umap')

grid.arrange(p4.1,p4.2,p4.3,p4.4,p4.5,p4.6,ncol = 2, nrow = 3)

##########################################################################
##########################################################################
# Define parameters for the QC plot (adjust as needed)
param <- list(
  assay_raw = "SCT",      # Change if your assay is named differently
  col = "red",           # Color for high values in FeaturePlot gradient
  col_clusters = "Paired"   # Color palette for clusters in the violin plot
)

# Define the samples (adjust these names if needed)
samples <- c("RGc8", "RGc9", "RGc10", "RGc11", "RGc12", "RGc13")

for (sample in samples) {
  
  # Retrieve the seurat object for the current sample
  seurat_obj <- get(paste0("seurat_obj_", sample))
  
  ## --- nCount plots ---
  
  # Define QC feature for nCount
  qc_counts <- paste0("nCount_", param$assay_raw)
  
  # FeaturePlot for nCount
  p1_qc_counts <- suppressMessages(
    Seurat::FeaturePlot(seurat_obj, features = qc_counts) +
      ggtitle(paste0("Feature plot ",sample)) +
      scale_colour_gradient(low = "lightgrey", high = param$col, trans = "log10") +
      theme(plot.title = element_text(hjust = 0.5))
  )
  # Optionally label clusters (ensure LabelClusters is defined)
  p1_qc_counts <- LabelClusters(p1_qc_counts, id = "ident", box = FALSE)
  
  # Violin plot for nCount
  p2_qc_counts <- ggplot(seurat_obj[[]], aes_string(x = "seurat_clusters", y = qc_counts, 
                                                    fill = "seurat_clusters", group = "seurat_clusters")) +
    geom_violin(scale = "width") +
    ggtitle(paste0("Violin plot (log10 scale) ", sample)) +
    xlab("Cluster") +
    theme(legend.position = "none") +
    scale_y_log10()
  
  # Combine FeaturePlot and Violin plot using patchwork
  p1_p2_qc_counts <- p1_qc_counts | p2_qc_counts
  p1_p2_qc_counts <- p1_p2_qc_counts + patchwork::plot_annotation(
    title = paste0("Summed raw counts (", qc_counts, ", log10 scale)")
  )
  
  # Scatter plot: nCount vs nFeature using viridis for infinite colors
  m_nCount_nFeature <- paste0(c("nCount_", "nFeature_"), param$assay_raw)
  p3_qc_counts <- ggplot(seurat_obj[[]][, c(m_nCount_nFeature, "seurat_clusters")],
                         aes_string(x = m_nCount_nFeature[1], y = m_nCount_nFeature[2], colour = "seurat_clusters")) +
    geom_point() +
    scale_colour_viridis_d() +
    facet_wrap(~seurat_clusters) +
    theme(legend.position = "none",
          panel.background = element_rect(fill = "grey90", colour = NA),
          plot.background = element_rect(fill = "white", colour = NA),
          plot.title = element_text(hjust = 0.5)) +
    ggtitle(paste0(sample," - QC Metrics by Cluster"))
  
  ## --- nFeature plots ---
  
  # Define QC feature for nFeature
  qc_feature <- paste0("nFeature_", param$assay_raw)
  
  # FeaturePlot for nFeature
  p1_qc_feature <- suppressMessages(
    Seurat::FeaturePlot(seurat_obj, features = qc_feature) +
      ggtitle(paste0("Feature plot ", sample)) +
      scale_colour_gradient(low = "lightgrey", high = param$col, trans = "log10") +
      theme(plot.title = element_text(hjust = 0.5))
  )
  p1_qc_feature <- LabelClusters(p1_qc_feature, id = "ident", box = FALSE)
  
  # Violin plot for nFeature
  p2_qc_feature <- ggplot(seurat_obj[[]], aes_string(x = "seurat_clusters", y = qc_feature, 
                                                     fill = "seurat_clusters", group = "seurat_clusters")) +
    geom_violin(scale = "width") +
    ggtitle(paste0("Violin plot (log10 scale) ", sample)) +
    xlab("Cluster") +
    theme(legend.position = "none") +
    scale_y_log10()
  
  # Combine FeaturePlot and Violin plot for nFeature
  p1_p2_qc_feature <- p1_qc_feature | p2_qc_feature
  p1_p2_qc_feature <- p1_p2_qc_feature + patchwork::plot_annotation(
    title = paste0("Summed raw counts (", qc_feature, ", log10 scale)")
  )
  
  # Scatter plot: nFeature vs nFeature using viridis for infinite colors
  m_nFeature_nFeature <- paste0(c("nFeature_", "nFeature_"), param$assay_raw)
  p3_qc_feature <- ggplot(seurat_obj[[]][, c(m_nFeature_nFeature, "seurat_clusters")],
                          aes_string(x = m_nFeature_nFeature[1], y = m_nFeature_nFeature[2], colour = "seurat_clusters")) +
    geom_point() +
    scale_colour_viridis_d() +
    facet_wrap(~seurat_clusters) +
    theme(legend.position = "none",
          panel.background = element_rect(fill = "grey90", colour = NA),
          plot.background = element_rect(fill = "white", colour = NA),
          plot.title = element_text(hjust = 0.5)) +
    ggtitle(paste0(sample," - QC Metrics by Cluster"))
  
  ## --- Assign plots with dynamic names ---
  
  # For nCount QC plots
  assign(paste0("pRGc", sample, "_nCount_feature_violin"), p1_p2_qc_counts, envir = .GlobalEnv)
  assign(paste0("pRGc", sample, "_nCount_scatter"), p3_qc_counts, envir = .GlobalEnv)
  
  # For nFeature QC plots
  assign(paste0("pRGc", sample, "_nFeature_feature_violin"), p1_p2_qc_feature, envir = .GlobalEnv)
  assign(paste0("pRGc", sample, "_nFeature_scatter"), p3_qc_feature, envir = .GlobalEnv)
  
  # Optionally, print the plots for each sample:
  print(p1_p2_qc_counts)
  print(p3_qc_counts)
  print(p1_p2_qc_feature)
  print(p3_qc_feature)
}
##########################################################################
##########################################################################
# ----------------------------
# 5. Compute number of cells per condition
# ----------------------------
seurat_objects <- list(
  RGc8 = seurat_obj_RGc8,
  RGc9 = seurat_obj_RGc9,
  RGc10 = seurat_obj_RGc10,
  RGc11 = seurat_obj_RGc11,
  RGc12 = seurat_obj_RGc12,
  RGc13 = seurat_obj_RGc13)
cell_counts <- sapply(seurat_objects, function(obj) ncol(obj))
print(cell_counts)

seurat_objects_FloxFlox <- list(
  RGc8 = seurat_obj_RGc8,
  RGc9 = seurat_obj_RGc9,
  RGc10 = seurat_obj_RGc10)
cell_counts_FloxFlox <- sapply(seurat_objects_FloxFlox, function(obj) ncol(obj))
print(cell_counts_FloxFlox)

seurat_objects_CreFlox <- list(
  RGc11 = seurat_obj_RGc11,
  RGc12 = seurat_obj_RGc12,
  RGc13 = seurat_obj_RGc13)
cell_counts_CreFlox <- sapply(seurat_objects_CreFlox, function(obj) ncol(obj))
print(cell_counts_CreFlox)

# ----------------------------
# 6. Integration
# ----------------------------
# 1. Select integration features
features <- SelectIntegrationFeatures(object.list = seurat_objects, nfeatures = 3000)
# 2. Prepare objects for SCT integration
seurat_objects <- PrepSCTIntegration(object.list = seurat_objects, anchor.features = features)
# 3. Find integration anchors (using SCT normalization)
anchors <- FindIntegrationAnchors(object.list = seurat_objects, normalization.method = "SCT", 
                                  anchor.features = features, reduction = "rpca", k.anchor = 10)
# 4. Integrate the data (this returns one unified Seurat object)
seurat_integrated <- IntegrateData(anchorset = anchors, normalization.method = "SCT")
# 5. Run downstream analyses on the integrated object
seurat_integrated <- RunPCA(seurat_integrated, verbose = FALSE)
seurat_integrated <- FindNeighbors(seurat_integrated, dims = 1:20)
seurat_integrated <- FindClusters(seurat_integrated, resolution = seq(0.1, 1, 0.1), algorithm = 4)
seurat_integrated <- RunUMAP(seurat_integrated, dims = 1:20, verbose = FALSE)

# Generate plots
p6.1 <- DimPlot(seurat_integrated, reduction = 'umap', group.by = 'Condition', cols = c('red','blue'))
print(p6.1)
p6.2 <- DimPlot(seurat_integrated, reduction = 'umap', group.by = 'Sample', cols = c('red','green','blue','purple','yellow','cyan'))
print(p6.2)
grid.arrange(p6.1,p6.2,nrow=1,ncol=2)
pResolution_01 <- DimPlot(seurat_integrated, reduction = 'umap', group.by = 'integrated_snn_res.0.1', label = T)
print(pResolution_01)
pResolution_02 <- DimPlot(seurat_integrated, reduction = 'umap', group.by = 'integrated_snn_res.0.2', label = T)
print(pResolution_02)
pResolution_03 <- DimPlot(seurat_integrated, reduction = 'umap', group.by = 'integrated_snn_res.0.3', label = T)
print(pResolution_03)
pResolution_04 <- DimPlot(seurat_integrated, reduction = 'umap', group.by = 'integrated_snn_res.0.4', label = T)
print(pResolution_04)
pResolution_05 <- DimPlot(seurat_integrated, reduction = 'umap', group.by = 'integrated_snn_res.0.5', label = T)
print(pResolution_05)
pResolution_06 <- DimPlot(seurat_integrated, reduction = 'umap', group.by = 'integrated_snn_res.0.6', label = T)
print(pResolution_06)
pResolution_07 <- DimPlot(seurat_integrated, reduction = 'umap', group.by = 'integrated_snn_res.0.7', label = T)
print(pResolution_07)
pResolution_08 <- DimPlot(seurat_integrated, reduction = 'umap', group.by = 'integrated_snn_res.0.8', label = T)
print(pResolution_08)
pResolution_09 <- DimPlot(seurat_integrated, reduction = 'umap', group.by = 'integrated_snn_res.0.9', label = T)
print(pResolution_09)
pResolution_1 <- DimPlot(seurat_integrated, reduction = 'umap', group.by = 'integrated_snn_res.1', label = T)
print(pResolution_1)

DimPlot(seurat_integrated, reduction = 'umap', group.by = c("integrated_snn_res.0.1","integrated_snn_res.0.2","integrated_snn_res.0.3"), label = T)
DimPlot(seurat_integrated, reduction = 'umap', group.by = 'integrated_snn_res.0.1', label = T, split.by = "Condition")

# Generate silhouette scores
silhouette_scores01 <- silhouette(as.numeric(seurat_integrated$integrated_snn_res.0.1), dist(seurat_integrated@reductions$umap@cell.embeddings))
summary(silhouette_scores01)
silhouette_scores02 <- silhouette(as.numeric(seurat_integrated$integrated_snn_res.0.2), dist(seurat_integrated@reductions$umap@cell.embeddings))
summary(silhouette_scores02)
silhouette_scores03 <- silhouette(as.numeric(seurat_integrated$integrated_snn_res.0.3), dist(seurat_integrated@reductions$umap@cell.embeddings))
summary(silhouette_scores03)
silhouette_scores04 <- silhouette(as.numeric(seurat_integrated$integrated_snn_res.0.4), dist(seurat_integrated@reductions$umap@cell.embeddings))
summary(silhouette_scores04)
silhouette_scores05 <- silhouette(as.numeric(seurat_integrated$integrated_snn_res.0.5), dist(seurat_integrated@reductions$umap@cell.embeddings))
summary(silhouette_scores05)
silhouette_scores06 <- silhouette(as.numeric(seurat_integrated$integrated_snn_res.0.6), dist(seurat_integrated@reductions$umap@cell.embeddings))
summary(silhouette_scores06)
silhouette_scores07 <- silhouette(as.numeric(seurat_integrated$integrated_snn_res.0.7), dist(seurat_integrated@reductions$umap@cell.embeddings))
summary(silhouette_scores07)
silhouette_scores08 <- silhouette(as.numeric(seurat_integrated$integrated_snn_res.0.8), dist(seurat_integrated@reductions$umap@cell.embeddings))
summary(silhouette_scores08)
silhouette_scores09 <- silhouette(as.numeric(seurat_integrated$integrated_snn_res.0.9), dist(seurat_integrated@reductions$umap@cell.embeddings))
summary(silhouette_scores09)
silhouette_scores1 <- silhouette(as.numeric(seurat_integrated$integrated_snn_res.1), dist(seurat_integrated@reductions$umap@cell.embeddings))
summary(silhouette_scores1)

# Write here which one you choose and why: choose 0.1 because there are 9 clusters, enough to differentiate the cels we want.
seurat_integrated$seurat_clusters <- seurat_integrated$integrated_snn_res.0.1
Idents(seurat_integrated) <- "integrated_snn_res.0.1"
DimPlot(seurat_integrated, reduction = 'umap', group.by = c("Sample", "Condition", "seurat_clusters"), label = F)
DimPlot(seurat_integrated, reduction = 'umap', group.by = c("seurat_clusters"), split.by = "Condition", label = F)

# ----------------------------
# 7.1 Manual annotation - Using SCT assay
# ----------------------------
seurat_integrated_DEG_RNA <- seurat_integrated
seurat_integrated_DEG_RNA <- PrepSCTFindMarkers(seurat_integrated_DEG_RNA)
DefaultAssay(seurat_integrated_DEG_RNA) <- "SCT"
#seurat_integrated_DEG_RNA <- JoinLayers(seurat_integrated_DEG_RNA, assay = "RNA", layers = "counts")
#seurat_integrated_DEG_RNA <- JoinLayers(seurat_integrated_DEG_RNA, assay = "RNA", layers = "data")
markers_RNA <- FindAllMarkers(seurat_integrated_DEG_RNA, only.pos = TRUE, test.use = "wilcox", min.pct=0.1, logfc.threshold=1)

# ---- DUPLICATED OBJECT TO CONTAIN THE GENEID, INSTEAD OF ENSMBL ID
# Copy Seurat object
seurat_integrated_DEG_RNA_GeneID <- seurat_integrated_DEG_RNA
# Part 1: Build Gene Mapping
# Extract gene names from the counts slot
counts_gene_names_RNA <- rownames(GetAssayData(seurat_integrated_DEG_RNA_GeneID, assay = "SCT", layer = "counts"))
# Remove Ensembl version numbers (everything after the dot)
cleaned_counts_genes_RNA <- gsub("\\..*", "", counts_gene_names_RNA)
# Connect to Ensembl
ensembl <- useEnsembl(biomart = "genes", dataset = "mmusculus_gene_ensembl", version = 113, host = "https://www.ensembl.org")
# Retrieve gene symbols for Ensembl IDs
gene_map_RNA <- getBM(attributes = c("ensembl_gene_id", "external_gene_name"), 
                      filters = "ensembl_gene_id",
                      values = unique(cleaned_counts_genes_RNA),
                      mart = ensembl)
# Create a mapping dictionary (Ensembl ID -> gene symbol)
gene_dict_RNA <- setNames(gene_map_RNA$external_gene_name, gene_map_RNA$ensembl_gene_id)
# Handle missing gene names: Keep Ensembl ID if no symbol is found
gene_dict_RNA[is.na(gene_dict_RNA) | gene_dict_RNA == ""] <- names(gene_dict_RNA)[is.na(gene_dict_RNA) | gene_dict_RNA == ""]
# Part 2: Function to Update Gene Names
update_gene_names_RNA <- function(original_names, mapping) {
  # Remove version numbers from Ensembl IDs
  cleaned_names <- gsub("\\..*", "", original_names)
  # Map Ensembl IDs to gene symbols
  updated_names <- mapping[cleaned_names]
  # If mapping fails, keep original Ensembl ID
  missing_idx <- which(is.na(updated_names) | updated_names == "")
  if (length(missing_idx) > 0) {
    updated_names[missing_idx] <- original_names[missing_idx]
  }
  # Ensure unique gene names to avoid conflicts
  updated_names <- make.unique(updated_names)
  return(updated_names)
}
# Part 3: Update Row Names in RNA Assay Slots
# Get matrices from Seurat object
counts_matrix_RNA <- GetAssayData(seurat_integrated_DEG_RNA_GeneID, assay = "SCT", slot = "counts")
data_matrix_RNA <- GetAssayData(seurat_integrated_DEG_RNA_GeneID, assay = "SCT", slot = "data")
# Update row names for counts and data matrices
rownames(counts_matrix_RNA) <- update_gene_names_RNA(rownames(counts_matrix_RNA), gene_dict_RNA)
rownames(data_matrix_RNA) <- update_gene_names_RNA(rownames(data_matrix_RNA), gene_dict_RNA)
# Part 4: Rebuild the SCT Assay with Updated Gene Names
# 1. Get the SCT assay
sct_assay <- seurat_integrated_DEG_RNA_GeneID[["SCT"]]
# 2. Extract original Ensembl rownames
original_names <- rownames(sct_assay@counts)
# 3. Update row names using mapping function
updated_names <- update_gene_names_RNA(original_names, gene_dict_RNA)
# 4. Apply updated row names to counts and data slots
rownames(sct_assay@counts) <- updated_names
rownames(sct_assay@data) <- updated_names
# 5. Update only the matching genes in scale.data (subset of genes)
common_genes <- intersect(rownames(sct_assay@scale.data), original_names)
rownames(sct_assay@scale.data) <- updated_names[match(common_genes, original_names)]
# 6. Update meta.features
sct_assay@meta.features <- sct_assay@meta.features[original_names, , drop = FALSE]
rownames(sct_assay@meta.features) <- updated_names
# 7. Reassign the modified SCT assay back to the Seurat object
seurat_integrated_DEG_RNA_GeneID[["SCT"]] <- sct_assay
# Part 5: Verification
# Check dimensions (should match before/after)
print(dim(seurat_integrated_DEG_RNA_GeneID[["SCT"]]@counts))
print(dim(seurat_integrated_DEG_RNA_GeneID[["SCT"]]@data))
print(dim(seurat_integrated_DEG_RNA_GeneID[["SCT"]]@scale.data))
# Verify that gene names are now gene symbols instead of Ensembl IDs
print(head(rownames(GetAssayData(seurat_integrated_DEG_RNA_GeneID, assay = "SCT", slot = "scale.data"))))

# ----------------------------------------------------------------------------------------
# See the selected clusters
DimPlot(seurat_integrated_DEG_RNA_GeneID, reduction = 'umap', group.by = 'seurat_clusters', label = T)


###### Map markers_RNA from Ensmbl to GeneID
# 1) Remove the version from the Ensembl IDs
markers_RNA <- markers_RNA %>%
  mutate(ensembl_id_no_ver = str_replace(gene, "\\..*", ""))
# 2) Connect to Ensembl version 112 for mouse (Mus musculus)
ensembl <- useEnsembl(
  biomart = "genes",
  dataset = "mmusculus_gene_ensembl",
  version = 113
)
# 3) Retrieve gene symbols (external_gene_name) for the unique Ensembl IDs
gene_map <- getBM(
  attributes = c("ensembl_gene_id", "external_gene_name"), 
  filters    = "ensembl_gene_id",
  values     = unique(markers_RNA$ensembl_id_no_ver),
  mart       = ensembl
)
# 4) Join the gene symbols back to your markers_RNA table
markers_RNA <- markers_RNA %>%
  left_join(gene_map, by = c("ensembl_id_no_ver" = "ensembl_gene_id"))

# 5) Create a final column that uses the external gene name when available,
#    otherwise falls back to the original Ensembl ID with version (from the 'gene' column)
markers_RNA <- markers_RNA %>%
  mutate(final_gene_name = ifelse(is.na(external_gene_name) | external_gene_name == "",
                                  gene,
                                  external_gene_name))
# 6) (Optional) Replace the 'gene' column with your new names, or keep them both
markers_RNA$gene <- markers_RNA$final_gene_name
# 7) Check your final table
head(markers_RNA)
#write_csv(markers_RNA, file = "markers_RNA.csv")

#---------- Get top 10 genes from each cluster (https://cbiit.webex.com/recordingservice/sites/cbiit/recording/591b33d9e39a4b88ad4b3db123698915/playback)
top10PerCluster <- NULL  # Initialize an empty object to hold the results
for(i in 0:12){
  # Subset markers for cluster i
  clusterMarkers <- markers_RNA[markers_RNA$cluster == i, ]
  # Remove duplicate gene entries based on the 'gene' column
  clusterMarkers_unique <- clusterMarkers[!duplicated(clusterMarkers$gene), ]
  # Get top 10 unique markers for this cluster
  top10 <- head(clusterMarkers_unique, 10)
  # Append to the overall results
  top10PerCluster <- rbind(top10PerCluster, top10)
}
top10PerCluster

# ----------------------------
# 8. Isolating dendritic cell clusters - new approach
# ----------------------------
# 1. Subset the DCs from the integrated object
seurat_DCs_v2 <- subset(seurat_integrated_DEG_RNA_GeneID, 
                    subset = ACT_markers_RNA %in% c("cDC2", "cDC1"))
table(Idents(seurat_DCs_v2))
# 2. Re-run the Seurat workflow on the subset
seurat_DCs_v2 <- SCTransform(seurat_DCs_v2, 
                             vars.to.regress = "percent.mt",
                             verbose = TRUE)
seurat_DCs_v2 <- RunPCA(seurat_DCs_v2, assay = "SCT", verbose = FALSE)
ElbowPlot(seurat_DCs_v2)
# 3. Identify neighbors and clusters using the top 12 principal components and a resolution of (?)
seurat_DCs_v2 <- FindNeighbors(seurat_DCs_v2, dims = 1:12)
seurat_DCs_v2 <- FindClusters(seurat_DCs_v2, resolution = seq(0.1, 1, 0.1), algorithm = 4)
# (Optional) Run UMAP for visualization
seurat_DCs_v2 <- RunUMAP(seurat_DCs_v2, dims = 1:12)
DimPlot(seurat_DCs_v2, reduction = 'umap', group.by = 'SCT_snn_res.0.1', label = T, split.by = "Condition")
silhouette <- silhouette(as.numeric(seurat_DCs_v2$SCT_snn_res.0.1), dist(seurat_DCs_v2@reductions$umap@cell.embeddings))
summary(silhouette)

seurat_DCs_v2$seurat_clusters_DC <- seurat_DCs_v2$SCT_snn_res.0.1
Idents(seurat_DCs_v2) <- "SCT_snn_res.0.1"
Idents(seurat_DCs_v2)
DimPlot(seurat_DCs_v2, reduction = 'umap', group.by = c("Sample", "Condition", "seurat_clusters_DC"), label = F)
DimPlot(seurat_DCs_v2, reduction = 'umap', group.by = c("seurat_clusters_DC"), split.by = "Condition", label = F)


#-------- Map Ensmbl to GeneID on seurat_DCs_v2
# Part 1: Build Gene Mapping
# Extract gene names from the counts slot
counts_gene_names_RNA_DCs <- rownames(GetAssayData(seurat_DCs_v2, assay = "SCT", layer = "counts"))
# Remove Ensembl version numbers (everything after the dot)
cleaned_counts_genes_RNA_DCs <- gsub("\\..*", "", counts_gene_names_RNA_DCs)
# Retrieve gene symbols for Ensembl IDs
gene_map_RNA_DCs <- getBM(attributes = c("ensembl_gene_id", "external_gene_name"), 
                      filters = "ensembl_gene_id",
                      values = unique(cleaned_counts_genes_RNA_DCs),
                      mart = ensembl)
# Create a mapping dictionary (Ensembl ID -> gene symbol)
gene_dict_RNA_DCs <- setNames(gene_map_RNA_DCs$external_gene_name, gene_map_RNA_DCs$ensembl_gene_id)
# Handle missing gene names: Keep Ensembl ID if no symbol is found
gene_dict_RNA_DCs[is.na(gene_dict_RNA_DCs) | gene_dict_RNA_DCs == ""] <- names(gene_dict_RNA_DCs)[is.na(gene_dict_RNA_DCs) | gene_dict_RNA_DCs == ""]

# Part 2: Function to Update Gene Names
update_gene_names_RNA_DCs <- function(original_names, mapping) {
  # Remove version numbers from Ensembl IDs
  cleaned_names <- gsub("\\..*", "", original_names)
  # Map Ensembl IDs to gene symbols
  updated_names <- mapping[cleaned_names]
  # If mapping fails, keep original Ensembl ID
  missing_idx <- which(is.na(updated_names) | updated_names == "")
  if (length(missing_idx) > 0) {
    updated_names[missing_idx] <- original_names[missing_idx]
  }
  # Ensure unique gene names to avoid conflicts
  updated_names <- make.unique(updated_names)
  return(updated_names)
}

# Part 3: Update Row Names in RNA Assay Slots
# Get matrices from Seurat object
counts_matrix_RNA_DCs <- GetAssayData(seurat_DCs_v2, assay = "SCT", slot = "counts")
data_matrix_RNA_DCs <- GetAssayData(seurat_DCs_v2, assay = "SCT", slot = "data")
# Update row names for counts and data matrices
rownames(counts_matrix_RNA_DCs) <- update_gene_names_RNA_DCs(rownames(counts_matrix_RNA_DCs), gene_dict_RNA_DCs)
rownames(data_matrix_RNA_DCs) <- update_gene_names_RNA_DCs(rownames(data_matrix_RNA_DCs), gene_dict_RNA_DCs)

# Part 4: Rebuild the SCT Assay with Updated Gene Names
# 1. Get the SCT assay
sct_assay_DCs <- seurat_DCs_v2[["SCT"]]
# 2. Extract original Ensembl rownames
original_names_DCs <- rownames(sct_assay_DCs@counts)
# 3. Update row names using mapping function
updated_names_DCs <- update_gene_names_RNA_DCs(original_names_DCs, gene_dict_RNA_DCs)
# 4. Apply updated row names to counts and data slots
rownames(sct_assay_DCs@counts) <- updated_names_DCs
rownames(sct_assay_DCs@data) <- updated_names_DCs
# 5. Update only the matching genes in scale.data (subset of genes)
common_genes_DCs <- intersect(rownames(sct_assay_DCs@scale.data), original_names_DCs)
rownames(sct_assay_DCs@scale.data) <- updated_names_DCs[match(common_genes_DCs, original_names_DCs)]
# 6. Update meta.features
sct_assay_DCs@meta.features <- sct_assay_DCs@meta.features[original_names_DCs, , drop = FALSE]
rownames(sct_assay_DCs@meta.features) <- updated_names_DCs
# **7. Update var.features**
sct_assay_DCs@var.features <- update_gene_names_RNA_DCs(sct_assay_DCs@var.features, gene_dict_RNA_DCs)
# 8. Reassign the modified SCT assay back to the Seurat object
seurat_DCs_v2[["SCT"]] <- sct_assay_DCs

# Part 5: Verification
# Check dimensions (should match before/after)
print(dim(seurat_DCs_v2[["SCT"]]@counts))
print(dim(seurat_DCs_v2[["SCT"]]@data))
print(dim(seurat_DCs_v2[["SCT"]]@scale.data))
# Verify that gene names are now gene symbols instead of Ensembl IDs
print(head(rownames(GetAssayData(sct_assay_DCs, assay = "SCT", slot = "scale.data"))))
print(head(rownames(GetAssayData(seurat_DCs_v2, slot = "counts"))))


#-------- Identify markers for each DC cluster
DC_markers <- FindAllMarkers(seurat_DCs_v2, 
                             only.pos = TRUE, 
                             min.pct = 0.01,
                             test.use = "wilcox",
                             logfc.threshold = 0.01)
#---------- 

#---------- Get top 10 genes from each cluster
top10PerCluster_DC = matrix(ncol=7) # dim(DC_markers)
colnames(top10PerCluster_DC) = colnames(DC_markers)
for (i in 0:3){
  top10PerCluster_DC = rbind(top10PerCluster_DC, head(DC_markers[which(DC_markers$cluster==i),], 10))
}
top10PerCluster_DC=top10PerCluster_DC[-1,]
top10PerCluster_DC
#---------- 

#--- Mapping to DCclusters
dc_map <- c(
  "1"  = "cDC2",
  "2"  = "cDC1",
  "3"  = "migDC"
)
# Get the cluster assignments as characters from the meta.data
clusters_RNA_DCs <- as.character(seurat_DCs_v2$seurat_clusters_DC)
# Map the cluster assignments to the new cell type names using dc_map
dc_cluster_names <- dc_map[clusters_RNA_DCs]
# Remove names from the vector (ensuring no name mismatch)
dc_cluster_names <- unname(dc_cluster_names)
# Add the new metadata column using AddMetaData
seurat_DCs_v2 <- AddMetaData(object = seurat_DCs_v2,metadata = dc_cluster_names, col.name = "dc_cluster_names")
#Graph
DimPlot(seurat_DCs_v2, group.by = 'dc_cluster_names', reduction = "umap", label = TRUE, label.size = 4)
DimPlot(seurat_DCs_v2, group.by = 'dc_cluster_names', reduction = "umap", label = TRUE, label.size = 4, split.by = "Condition")
table(seurat_DCs_v2$dc_cluster_names, seurat_DCs_v2$Condition)

# Fisher test
# cDC1
table_cDC1 <- matrix(c(32, 277, 90, 567), nrow = 2, byrow = TRUE)
fisher.test(table_cDC1)

# cDC2
table_cDC2 <- matrix(c(265, 44, 505, 152), nrow = 2, byrow = TRUE)
fisher.test(table_cDC2)

# migDC
table_migDC <- matrix(c(12, 297, 62, 595), nrow = 2, byrow = TRUE)
fisher.test(table_migDC)

#---------- Heatmap for top10PerCluster_DC
# Retrieve expression data from the SCT assay (gene symbols are already the rownames)
expr_data_DC <- GetAssayData(seurat_DCs_v2, assay = "SCT", slot = "data")
# Use your list of heatmap genes (assumed to be gene symbols)
gene_list_heatmap_DC <- top10PerCluster_DC$gene
# Check that all genes in your list are present in the assay data
present_genes <- gene_list_heatmap_DC[gene_list_heatmap_DC %in% rownames(expr_data_DC)]
if(length(present_genes) < length(gene_list_heatmap_DC)) {
  warning("Some genes in gene_list_heatmap_DC are not found in the SCT assay and will be excluded.")
}
# Run AverageExpression using the gene symbols
avg_expr_DC <- AverageExpression(seurat_DCs_v2, 
                                 assays = "SCT", 
                                 features = present_genes, 
                                 slot = "data")$SCT
# Order the rows according to your original gene list (for only the genes that are present)
avg_expr_ordered_DC <- avg_expr_DC[present_genes, , drop = FALSE]

DC_markers_colNames <- dc_map[sub("g", "", colnames(avg_expr_ordered_DC))]
colnames(avg_expr_ordered_DC) <- DC_markers_colNames

# Finally, plot the heatmap
pheatmap(avg_expr_ordered_DC, 
         scale = "row", 
         cluster_rows = FALSE, 
         cluster_cols = FALSE,
         angle_col = 45,
         border_color = "black",
         fontsize_row = 8,
         fontsize_col = 8,
         cellwidth = 14,
         main = "Expression of Selected Markers Across Clusters")


# ----------------------------
# 9.1 Isolating T cell clusters - new approach
# ----------------------------
# 1. Subset the DCs from the integrated object
seurat_Tcells <- subset(seurat_integrated_DEG_RNA_GeneID, 
                        subset = ACT_markers_RNA %in% c("T cells"))
table(Idents(seurat_Tcells))
# 2. Re-run the Seurat workflow on the subset
seurat_Tcells <- SCTransform(seurat_Tcells, 
                             vars.to.regress = "percent.mt",
                             verbose = TRUE)
seurat_Tcells <- RunPCA(seurat_Tcells, assay = "SCT", verbose = FALSE)
ElbowPlot(seurat_Tcells)
# 3. Identify neighbors and clusters using the top 12 principal components and a resolution of (?)
seurat_Tcells <- FindNeighbors(seurat_Tcells, dims = 1:12)
seurat_Tcells <- FindClusters(seurat_Tcells, resolution = seq(0.1, 1, 0.1), algorithm = 4)
# (Optional) Run UMAP for visualization
seurat_Tcells <- RunUMAP(seurat_Tcells, dims = 1:12)
DimPlot(seurat_Tcells, reduction = 'umap', group.by = 'SCT_snn_res.0.8', label = T)
DimPlot(seurat_Tcells, reduction = 'umap', group.by = 'SCT_snn_res.0.8', label = T, split.by = "Condition")
silhouetteTcell <- silhouette(as.numeric(seurat_Tcells$SCT_snn_res.0.1), dist(seurat_Tcells@reductions$umap@cell.embeddings))
summary(silhouetteTcell)
silhouetteTcell <- silhouette(as.numeric(seurat_Tcells$SCT_snn_res.0.2), dist(seurat_Tcells@reductions$umap@cell.embeddings))
summary(silhouetteTcell)
silhouetteTcell <- silhouette(as.numeric(seurat_Tcells$SCT_snn_res.0.3), dist(seurat_Tcells@reductions$umap@cell.embeddings))
summary(silhouetteTcell)
silhouetteTcell <- silhouette(as.numeric(seurat_Tcells$SCT_snn_res.0.4), dist(seurat_Tcells@reductions$umap@cell.embeddings))
summary(silhouetteTcell)
silhouetteTcell <- silhouette(as.numeric(seurat_Tcells$SCT_snn_res.0.5), dist(seurat_Tcells@reductions$umap@cell.embeddings))
summary(silhouetteTcell)
silhouetteTcell <- silhouette(as.numeric(seurat_Tcells$SCT_snn_res.0.6), dist(seurat_Tcells@reductions$umap@cell.embeddings))
summary(silhouetteTcell)
silhouetteTcell <- silhouette(as.numeric(seurat_Tcells$SCT_snn_res.0.7), dist(seurat_Tcells@reductions$umap@cell.embeddings))
summary(silhouetteTcell)
silhouetteTcell <- silhouette(as.numeric(seurat_Tcells$SCT_snn_res.0.8), dist(seurat_Tcells@reductions$umap@cell.embeddings))
summary(silhouetteTcell)

seurat_Tcells$seurat_Tcells <- seurat_Tcells$SCT_snn_res.0.8
Idents(seurat_Tcells) <- "SCT_snn_res.0.8"
Idents(seurat_Tcells)
DimPlot(seurat_Tcells, reduction = 'umap', group.by = c("Sample", "Condition", "seurat_Tcells"), label = F)
DimPlot(seurat_Tcells, reduction = 'umap', group.by = c("seurat_Tcells"), split.by = "Condition", label = F)
DimPlot(seurat_Tcells, reduction = 'umap', group.by = c("seurat_Tcells"), label = F)



#-------- Map Ensmbl to GeneID on seurat_DCs_v2
# Part 1: Build Gene Mapping
# Extract gene names from the counts slot
counts_gene_names_RNA_Tcells <- rownames(GetAssayData(seurat_Tcells, assay = "SCT", layer = "counts"))
# Remove Ensembl version numbers (everything after the dot)
cleaned_counts_gene_names_RNA_Tcells <- gsub("\\..*", "", counts_gene_names_RNA_Tcells)
# Retrieve gene symbols for Ensembl IDs
gene_map_RNA_Tcells <- getBM(attributes = c("ensembl_gene_id", "external_gene_name"), 
                          filters = "ensembl_gene_id",
                          values = unique(cleaned_counts_gene_names_RNA_Tcells),
                          mart = ensembl)
# Create a mapping dictionary (Ensembl ID -> gene symbol)
gene_dict_RNA_Tcells <- setNames(gene_map_RNA_Tcells$external_gene_name, gene_map_RNA_Tcells$ensembl_gene_id)
# Handle missing gene names: Keep Ensembl ID if no symbol is found
gene_dict_RNA_Tcells[is.na(gene_dict_RNA_Tcells) | gene_dict_RNA_Tcells == ""] <- names(gene_dict_RNA_Tcells)[is.na(gene_dict_RNA_Tcells) | gene_dict_RNA_Tcells == ""]

# Part 2: Function to Update Gene Names
update_gene_names_RNA_Tcells <- function(original_names, mapping) {
  # Remove version numbers from Ensembl IDs
  cleaned_names <- gsub("\\..*", "", original_names)
  # Map Ensembl IDs to gene symbols
  updated_names <- mapping[cleaned_names]
  # If mapping fails, keep original Ensembl ID
  missing_idx <- which(is.na(updated_names) | updated_names == "")
  if (length(missing_idx) > 0) {
    updated_names[missing_idx] <- original_names[missing_idx]
  }
  # Ensure unique gene names to avoid conflicts
  updated_names <- make.unique(updated_names)
  return(updated_names)
}

# Part 3: Update Row Names in RNA Assay Slots
# Get matrices from Seurat object
counts_matrix_RNA_Tcells <- GetAssayData(seurat_Tcells, assay = "SCT", slot = "counts")
data_matrix_RNA_Tcells <- GetAssayData(seurat_Tcells, assay = "SCT", slot = "data")
# Update row names for counts and data matrices
rownames(counts_matrix_RNA_Tcells) <- update_gene_names_RNA_Tcells(rownames(counts_matrix_RNA_Tcells), gene_dict_RNA_Tcells)
rownames(data_matrix_RNA_Tcells) <- update_gene_names_RNA_Tcells(rownames(data_matrix_RNA_Tcells), gene_dict_RNA_Tcells)

# Part 4: Rebuild the SCT Assay with Updated Gene Names
# 1. Get the SCT assay
sct_assay_Tcells <- seurat_Tcells[["SCT"]]
# 2. Extract original Ensembl rownames
original_names_Tcells <- rownames(sct_assay_Tcells@counts)
# 3. Update row names using mapping function
updated_names_Tcells <- update_gene_names_RNA_Tcells(original_names_Tcells, gene_dict_RNA_Tcells)
# 4. Apply updated row names to counts and data slots
rownames(sct_assay_Tcells@counts) <- updated_names_Tcells
rownames(sct_assay_Tcells@data) <- updated_names_Tcells
# 5. Update only the matching genes in scale.data (subset of genes)
common_genes_Tcells <- intersect(rownames(sct_assay_Tcells@scale.data), original_names_Tcells)
rownames(sct_assay_Tcells@scale.data) <- updated_names_Tcells[match(common_genes_Tcells, original_names_Tcells)]
# 6. Update meta.features
sct_assay_Tcells@meta.features <- sct_assay_Tcells@meta.features[original_names_Tcells, , drop = FALSE]
rownames(sct_assay_Tcells@meta.features) <- updated_names_Tcells
# **7. Update var.features**
sct_assay_Tcells@var.features <- update_gene_names_RNA_Tcells(sct_assay_Tcells@var.features, gene_dict_RNA_Tcells)
# 8. Reassign the modified SCT assay back to the Seurat object
seurat_Tcells[["SCT"]] <- sct_assay_Tcells

# Part 5: Verification
# Check dimensions (should match before/after)
print(dim(seurat_Tcells[["SCT"]]@counts))
print(dim(seurat_Tcells[["SCT"]]@data))
print(dim(seurat_Tcells[["SCT"]]@scale.data))
# Verify that gene names are now gene symbols instead of Ensembl IDs
print(head(rownames(GetAssayData(sct_assay_Tcells, assay = "SCT", slot = "scale.data"))))
print(head(rownames(GetAssayData(seurat_Tcells, slot = "counts"))))


#-------- Identify markers for each DC cluster
Tcells_markers <- FindAllMarkers(seurat_Tcells, 
                             only.pos = TRUE, 
                             min.pct = 0.01,
                             test.use = "wilcox",
                             logfc.threshold = 0.01)
#---------- Get top 10 genes from each cluster
top10PerCluster_Tcells = matrix(ncol=7) # dim(Tcells_markers)
colnames(top10PerCluster_Tcells) = colnames(Tcells_markers)
for (i in 1:7){
  top10PerCluster_Tcells = rbind(top10PerCluster_Tcells, head(Tcells_markers[which(Tcells_markers$cluster==i),], 10))
}
top10PerCluster_Tcells=top10PerCluster_Tcells[-1,]
top10PerCluster_Tcells
#---------- 

# ----------------------------
# 9.2 Isolating T cell clusters - TILPRED
# ----------------------------
# 1. Load the reference map for mouse tumor-infiltrating T cells (by default)
ref <- load.reference.map()
# For a small dataset, run the projection in "direct" mode.
# This applies the PCA and UMAP rotations directly (without batch-effect correction).
# Optionally, if you want to disable the default TILPRED filtering (removal of non-T cells),
# you can set filter.cells = FALSE (default is TRUE).
projected_Tcells_direct <- make.projection(query = seurat_Tcells, 
                                           ref = ref, 
                                           direct = TRUE) # runs in direct mode
# Visualize the projected query cells on the reference UMAP
plot.projection(projected_Tcells_direct,
                ref = ref)

# (Optional) If you wish to predict cell states with a nearest-neighbor approach:
predicted_states <- cellstate.predict(projected_Tcells_direct, 
                                      ref = ref, 
                                      k = 20)  # k can be tuned based on your dataset

# You can then visualize the predicted cell states on UMAP:
DimPlot(predicted_states, group.by = c("functional.cluster"), label = TRUE) +
  ggtitle("T Cell States (Nearest Neighbor Prediction)")

custom_palette <- c(
  "CD8_Tex"            = "#edbe2a",
  "CD8_NaiveLike"      = "#00b6eb",
  "Treg"               = "#e812dd",
  "Th1"                = "#87f6a5",
  "CD4_NaiveLike"      = "#e5e4e2",
  "CD8_EffectorMemory" = "#63bb17"
)

DimPlot(predicted_states, 
        group.by = "functional.cluster", 
        label = TRUE, 
        cols = custom_palette) +
  ggtitle("T Cell States (Nearest Neighbor Prediction)")

DimPlot(predicted_states, 
        group.by = "functional.cluster", 
        label = TRUE,
        #split.by = "Condition",
        cols = custom_palette) +
  ggtitle("T Cell States (Nearest Neighbor Prediction)")


#-------- Identify markers for each DC cluster
Idents(predicted_states) <- predicted_states$functional.cluster

Tcells_markers_TILPRED <- FindAllMarkers(predicted_states, 
                                 only.pos = TRUE, 
                                 min.pct = 0.01,
                                 test.use = "wilcox",
                                 logfc.threshold = 0.01)

unique_clusters_TILPRED  <- unique(Tcells_markers_TILPRED$cluster)
#---------- Get top 10 genes from each cluster
top10PerCluster_Tcells_TILPRED <- NULL  # Start clean

for (cluster_id in unique_clusters_TILPRED) {
  cluster_markers <- Tcells_markers_TILPRED[Tcells_markers_TILPRED$cluster == cluster_id, ]
  cluster_top10 <- head(cluster_markers[!duplicated(cluster_markers$gene), ], 10)
  top10PerCluster_Tcells_TILPRED <- rbind(top10PerCluster_Tcells_TILPRED, cluster_top10)
}
#---------- 




# ----------------------------
# Figures for paper
# ----------------------------
#
#---------- Initial graphs
#
DimPlot(seurat_integrated, reduction = 'umap', group.by = "Sample", label = FALSE, pt.size = 0.2,
        cols = c(
          "RGc8" = "#37C8C1",  
          "RGc9" = "#3786C8",  
          "RGc10" = "#37C879", 
          "RGc11" = "#9618E7", 
          "RGc12" = "#E718D1", 
          "RGc13" = "#2E18E7"))  

DimPlot(seurat_integrated, reduction = 'umap', group.by = c("Condition"), label = F, pt.size = 0.2,
        cols = c(
          "CreFlox" = "#A00404",
          "FloxFlox" = "#057a92"
        ))

DimPlot(seurat_integrated, reduction = 'umap', group.by = "seurat_clusters", label = FALSE, pt.size = 0.2,
         cols = c(
           "1" = "#e6f598",  
           "2" = "#abdda4",  
           "3" = "#48c373", 
           "4" = "#d53e4f", 
           "5" = "#db662f", 
           "6" = "#ffcb99",
           "7" = "#3288bd",
           "8" = "#fee08b",
           "9" = "#e2e2a5"
           ))  

#
#---------- Myeloid vs lymphoid UMAPs
#
# Assign colors: Myeloid = distinct colors, Lymphoid = grey
color_palette_myeloid_all <- c(
  "Monocytes" = "#e6f598",  # Orange
  "Macrophages" = "#abdda4",  # Blue
  "cDC1" = "#3288bd",  # Green
  "cDC2" = "#48c373",  # Yellow
  "Mast cells" = "#e2e2a5",  # Pink
  "T cells" = "grey30",
  "NK cells" = "grey30",
  "B cells" = "grey30",
  "pDC" = "#fee08b"
)
# Generate the UMAP plot
umap_plot <- DimPlot(seurat_integrated_DEG_RNA_GeneID, reduction = "umap", group.by = "ACT_markers_RNA", pt.size = 0.2) +
  scale_color_manual(values = color_palette_myeloid_all) +
  theme_classic() +
  theme(
    panel.grid = element_blank(),  # Removes grid lines
    panel.background = element_blank()  # Ensures a clean background
  ) +
  ggtitle("UMAP Plot with Myeloid Cells Colored")
# Display the plot
print(umap_plot)

#-
# Assign colors: Myeloid = distinct colors, Lymphoid = grey
color_palette_cDCs <- c(
  "Monocytes" = "grey30",
  "Macrophages" = "grey30",
  "cDC1" = "#3288bd",
  "cDC2" = "#48c373",
  "Mast cells" = "grey30",
  "T cells" = "grey30",
  "NK cells" = "grey30",
  "B cells" = "grey30",
  "pDC" = "grey30"
)
# Generate the UMAP plot
umap_plot <- DimPlot(seurat_integrated_DEG_RNA_GeneID, reduction = "umap", group.by = "ACT_markers_RNA", pt.size = 0.2) +
  scale_color_manual(values = color_palette_cDCs) +
  theme_classic() +
  theme(
    panel.grid = element_blank(),  # Removes grid lines
    panel.background = element_blank()  # Ensures a clean background
  ) +
  ggtitle("UMAP Plot with cDCs Cells Colored")
# Display the plot
print(umap_plot)

#-
color_palette_lymphoid_all <- c(
  "Monocytes" = "grey30",  # Orange
  "Macrophages" = "grey30",  # Blue
  "cDC1" = "grey30",  # Green
  "cDC2" = "grey30",  # Yellow
  "Mast cells" = "grey30",  # Pink
  "T cells" = "#d53e4f",
  "NK cells" = "#db662f",
  "B cells" = "#ffcb99",
  "pDC" = "grey30"
)
# Generate the UMAP plot
umap_plot <- DimPlot(seurat_integrated_DEG_RNA_GeneID, reduction = "umap", group.by = "ACT_markers_RNA", pt.size = 0.2) +
  scale_color_manual(values = color_palette_lymphoid_all) +
  theme_classic() +
  theme(
    panel.grid = element_blank(),  # Removes grid lines
    panel.background = element_blank()  # Ensures a clean background
  ) +
  ggtitle("UMAP Plot with Myeloid Cells Colored")
# Display the plot
print(umap_plot)
#-
color_palette_Tcells <- c(
  "Monocytes" = "grey30",  # Orange
  "Macrophages" = "grey30",  # Blue
  "cDC1" = "grey30",  # Green
  "cDC2" = "grey30",  # Yellow
  "Mast cells" = "grey30",  # Pink
  "T cells" = "#d62d20",
  "NK cells" = "grey30",
  "B cells" = "grey30",
  "pDC" = "grey30"
)
# Generate the UMAP plot
umap_plot <- DimPlot(seurat_integrated_DEG_RNA_GeneID, reduction = "umap", group.by = "ACT_markers_RNA", pt.size = 0.2) +
  scale_color_manual(values = color_palette_Tcells) +
  theme_classic() +
  theme(
    panel.grid = element_blank(),  # Removes grid lines
    panel.background = element_blank()  # Ensures a clean background
  ) +
  ggtitle("UMAP Plot with Myeloid Cells Colored")
# Display the plot
print(umap_plot)


seurat_integrated_DEG_RNA_GeneID <- RenameIdents(seurat_integrated_DEG_RNA_GeneID, act_map)

# Classical heatmap
DoHeatmap(seurat_integrated_DEG_RNA_GeneID, 
          features = top10PerCluster$gene, 
          slot = "scale.data", 
          group.colors = c(
            "Monocytes" = "#e6f598",
            "Macrophages" = "#abdda4",
            "cDC2" = "#48c373",
            "T cells" = "#d53e4f",
            "NK cells" = "#db662f",
            "B cells" = "#ffcb99",
            "cDC1" = "#3288bd",
            "pDC" = "#fee08b",
            "Mast cells" = "#e2e2a5"
          )) + 
  theme(legend.text = element_text(size = 7, color = "black"),
        axis.text.y = element_text(size = 7, color = "black"))


#
#---------- Feature plots with the expression of main genes per cluster
#
# CD45
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Ptprc",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))

# DCs
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Itgax",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3, alpha = 0.7) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Flt3",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3, alpha = 0.7) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))

# cDC1
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Xcr1",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Clec9a",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Irf8",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Batf3",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Cd24a",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Itgae",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Ifi205",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))

# cDC2
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Itgam",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Cd209a",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Cd14",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "H2-DMb2",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "H2-Eb1",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Ifi30",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Sirpa",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))


# migDC
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Fscn1",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Ccr7",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Ccl22",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))

# CD8_Tex
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Cd8a",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Ifngr1",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Pdcd1",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Klrg1",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Ctla4",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Gzmb",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Lag3",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Havcr2",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))

# CD8_EffectorMemory
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Cd8a",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Ifngr1",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))

# CD8_NaiveLike
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Cd8a",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Ifngr1",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Ccr7",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))

# CD4_NaiveLike
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Cd4",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Ccr7",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Ifngr1",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))

# Bcell
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Cd19",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Cd79a",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))

# Macrophages
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Adgre1",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Mrc1",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))

# Monocytes
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Cd14",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Ifitm6",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Plcb1",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))

# NK cell
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Klrb1c",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Ncr1",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))

# T cell
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Cd3d",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Cd8a",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Ifngr1",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Pdcd1",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Ctla4",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Gzmb",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))
FeaturePlot(seurat_integrated_DEG_RNA_GeneID, features = "Cd4",reduction = "umap", pt.size = 0.2,min.cutoff = 0, max.cutoff = 3) +
  scale_color_gradientn(colours = c("#c9c9c9", "#7d4ceb", "#1000fe"),limits = c(0, 3),breaks = seq(0, 3, 1),guide = guide_colorbar(title = "Expression", barwidth = 0.5, barheight = 5))



#----
color_palette_ACT_markers_RNA <- c(
  "Monocytes" = "#e6f598",  # Orange
  "Macrophages" = "#abdda4",  # Blue
  "cDC1" = "#3288bd",  # Green
  "cDC2" = "#48c373",  # Yellow
  "Mast cells" = "#e2e2a5",  # Pink
  "T cells" = "#d53e4f",
  "NK cells" = "#db662f",
  "B cells" = "#ffcb99",
  "pDC" = "#fee08b"
)

# Define your gene list
marker_list_cDC1 <- c("Xcr1", "Clec9a", "Ptprc", "Irf8", "Batf3", "Cd24a", "Itgae", "Ifi205", "Itgax")
for (gene in marker_list_cDC1) {
  Vln_graph <- VlnPlot(seurat_integrated_DEG_RNA_GeneID,
               features = gene,
               pt.size = 0,
               group.by = "ACT_markers_RNA",
               cols = color_palette_ACT_markers_RNA) +
    labs(title = gene, y = "Expression") +
    theme_classic2() +
    theme(legend.position = "none")
  
  print(Vln_graph)
}

marker_list_cDC2 <- c("Itgam","Cd209a","H2-DMb2","H2-Eb1","Ifi30", "Sirpa")
for (gene in marker_list_cDC2) {
  Vln_graph <- VlnPlot(seurat_integrated_DEG_RNA_GeneID,
                       features = gene,
                       pt.size = 0,
                       group.by = "ACT_markers_RNA",
                       cols = color_palette_ACT_markers_RNA) +
    labs(title = gene, y = "Expression") +
    theme_classic2() +
    theme(legend.position = "none")
  
  print(Vln_graph)
}

marker_list_Tcell <- c("Cd8a","Cd3d","Gzmb","Cd4","Pdcd1","Ctla4")
for (gene in marker_list_Tcell) {
  Vln_graph <- VlnPlot(seurat_integrated_DEG_RNA_GeneID,
                       features = gene,
                       pt.size = 0,
                       group.by = "ACT_markers_RNA",
                       cols = color_palette_ACT_markers_RNA) +
    labs(title = gene, y = "Expression") +
    theme_classic2() +
    theme(legend.position = "none")
  
  print(Vln_graph)
}

marker_list_NKcell <- c("Klrb1c","Ncr1")
for (gene in marker_list_NKcell) {
  Vln_graph <- VlnPlot(seurat_integrated_DEG_RNA_GeneID,
                       features = gene,
                       pt.size = 0,
                       group.by = "ACT_markers_RNA",
                       cols = color_palette_ACT_markers_RNA) +
    labs(title = gene, y = "Expression") +
    theme_classic2() +
    theme(legend.position = "none")
  
  print(Vln_graph)
}

for (gene in marker_list_cDC1) {
  expr_df <- FetchData(seurat_integrated_DEG_RNA_GeneID,
                       vars = c(gene, "ACT_markers_RNA"))
  expr_df <- expr_df[expr_df[[gene]] > 0, ]
  Ridge_graph <- ggplot(expr_df, aes(x = .data[[gene]], y = ACT_markers_RNA, fill = ACT_markers_RNA)) +
    scale_fill_manual(values = color_palette_ACT_markers_RNA) +
    geom_density_ridges(scale = 1.5, alpha = 0.8, rel_min_height = 0.01, 
                        #size = 0.3, # line thickness
                        show.legend = FALSE, 
                        color = "black") +  # adds the line on the wave AND the baseline
    labs(title = gene, x = "Expression", y = "Cell Type") +
    theme_minimal() +
    theme(
      panel.grid.major.x = element_blank(),  # remove vertical major grid lines
      panel.grid.minor.x = element_blank(),  # remove vertical minor grid lines
      panel.grid.major.y = element_line(color = "grey80"),  # keep horizontal lines
      panel.grid.minor.y = element_blank(),  # optional: remove minor horizontal lines
      legend.position = "none")
  print(Ridge_graph)
}

for (gene in marker_list_cDC1) {
  expr_df <- FetchData(seurat_integrated_DEG_RNA_GeneID,
                       vars = c(gene, "ACT_markers_RNA"))
  expr_df <- expr_df[expr_df[[gene]] > 0, ]
  Ridge_graph <- ggplot(expr_df, aes(x = .data[[gene]], y = ACT_markers_RNA, fill = ACT_markers_RNA)) +
    scale_fill_manual(values = color_palette_ACT_markers_RNA) +
    geom_density_ridges(scale = 1.5, alpha = 0.8, rel_min_height = 0.01, 
                        #size = 0.3, # line thickness
                        show.legend = FALSE, 
                        color = "black") +  # adds the line on the wave AND the baseline
    labs(title = gene, x = "Expression", y = "Cell Type") +
    theme_minimal() +
    theme(
      panel.grid.major.x = element_blank(),  # remove vertical major grid lines
      panel.grid.minor.x = element_blank(),  # remove vertical minor grid lines
      panel.grid.major.y = element_line(color = "grey80"),  # keep horizontal lines
      panel.grid.minor.y = element_blank(),  # optional: remove minor horizontal lines
      legend.position = "none")
  print(Ridge_graph)
}

for (gene in marker_list_Tcell) {
  expr_df <- FetchData(seurat_integrated_DEG_RNA_GeneID,
                       vars = c(gene, "ACT_markers_RNA"))
  expr_df <- expr_df[expr_df[[gene]] > 0, ]
  Ridge_graph <- ggplot(expr_df, aes(x = .data[[gene]], y = ACT_markers_RNA, fill = ACT_markers_RNA)) +
    scale_fill_manual(values = color_palette_ACT_markers_RNA) +
    geom_density_ridges(scale = 1.5, alpha = 0.8, rel_min_height = 0.01, 
                        #size = 0.3, # line thickness
                        show.legend = FALSE, 
                        color = "black") +  # adds the line on the wave AND the baseline
    labs(title = gene, x = "Expression", y = "Cell Type") +
    theme_minimal() +
    theme(
      panel.grid.major.x = element_blank(),  # remove vertical major grid lines
      panel.grid.minor.x = element_blank(),  # remove vertical minor grid lines
      panel.grid.major.y = element_line(color = "grey80"),  # keep horizontal lines
      panel.grid.minor.y = element_blank(),  # optional: remove minor horizontal lines
      legend.position = "none")
  print(Ridge_graph)
}
#
#---------- Heatmaps
# Heatmaps - Top10 all clusters
gene_list_heatmap <- top10PerCluster$gene
expr_data <- GetAssayData(seurat_integrated_DEG_RNA_GeneID, assay = "SCT", slot = "data") #[gene_list_heatmap, ]http://127.0.0.1:10825/graphics/plot_zoom_png?width=1856&height=861
missing_genes <- setdiff(gene_list_heatmap, rownames(GetAssayData(seurat_integrated_DEG_RNA_GeneID, assay = "SCT", slot = "data")))
print(missing_genes)

avg_expr <- AverageExpression(seurat_integrated_DEG_RNA_GeneID, assays = "SCT", features = gene_list_heatmap, layer = "data")$SCT
ordered_genes <- top10PerCluster$gene
avg_expr_ordered <- avg_expr[ordered_genes, ]

#ACT_markers_colNames <- act_map[sub("g", "", colnames(avg_expr_ordered))]
#colnames(avg_expr_ordered) <- ACT_markers_colNames
gene_labels_italic_All <- parse(text = paste0("italic('", rownames(avg_expr_ordered), "')"))

num_rows <- nrow(avg_expr_ordered)
pheatmap(avg_expr_ordered, 
         scale = "row", 
         cluster_rows = FALSE, 
         cluster_cols = FALSE,
         angle_col = 45,
         border_color = "grey30",
         fontsize_row = 7,
         fontsize_col = 10,
         cellwidth = 14,
         #cellheight = 8,
         height = num_rows * 0.6,  # tweak this factor if needed
         width = 6,
         labels_row = gene_labels_italic_All,
         main = "Expression of Selected Markers Across Clusters")

#---------- 
# Heatmaps - DC clusters
# Retrieve expression data from the SCT assay (gene symbols are already the rownames)
expr_data_DC <- GetAssayData(seurat_DCs_v2, assay = "SCT", slot = "data")
# Use your list of heatmap genes (assumed to be gene symbols)
gene_list_heatmap_DC <- top10PerCluster_DC$gene
# Check that all genes in your list are present in the assay data
present_genes <- gene_list_heatmap_DC[gene_list_heatmap_DC %in% rownames(expr_data_DC)]
if(length(present_genes) < length(gene_list_heatmap_DC)) {
  warning("Some genes in gene_list_heatmap_DC are not found in the SCT assay and will be excluded.")
}
# Run AverageExpression using the gene symbols
avg_expr_DC <- AverageExpression(seurat_DCs_v2, 
                                 assays = "SCT", 
                                 features = present_genes, 
                                 slot = "data")$SCT
# Order the rows according to your original gene list (for only the genes that are present)
avg_expr_ordered_DC <- avg_expr_DC[present_genes, , drop = FALSE]

DC_markers_colNames <- dc_map[sub("g", "", colnames(avg_expr_ordered_DC))]
colnames(avg_expr_ordered_DC) <- DC_markers_colNames

gene_labels_italic_DCs <- parse(text = paste0("italic('", rownames(avg_expr_ordered_DC), "')"))

# Finally, plot the heatmap
num_rows_DCs <- nrow(avg_expr_ordered_DC)
pheatmap(avg_expr_ordered_DC, 
         scale = "row", 
         cluster_rows = FALSE, 
         cluster_cols = FALSE,
         angle_col = 45,
         border_color = "grey30",
         fontsize_row = 8,
         fontsize_col = 10,
         cellwidth = 14,
         #cellheight = 8,
         height = num_rows * 0.6,  # tweak this factor if needed
         width = 6,
         labels_row = gene_labels_italic_DCs,
         main = "Expression of Selected Markers Across Clusters")

#---------- 
# Heatmaps - Tcell clusters
gene_list_heatmap_TILPRED <- unique(top10PerCluster_Tcells_TILPRED$gene)

expr_data_TILPRED <- GetAssayData(predicted_states, assay = "SCT", slot = "data")
present_genes_TILPRED <- gene_list_heatmap_TILPRED[gene_list_heatmap_TILPRED %in% rownames(expr_data_TILPRED)]

if (length(present_genes_TILPRED) < length(gene_list_heatmap_TILPRED)) {
  warning("Some genes from the top 10 list are not found in the SCT assay and will be excluded.")
}

avg_expr_TILPRED <- AverageExpression(predicted_states,
                                      assays = "SCT",
                                      features = present_genes_TILPRED,
                                      group.by = "functional.cluster",
                                      slot = "data")$SCT

desired_order <- c(
  "CD8-Tex",
  "CD8-NaiveLike",
  "Treg",
  "Th1",
  "CD4-NaiveLike",
  "CD8-EffectorMemory"
)

missing_cols <- setdiff(desired_order, colnames(avg_expr_TILPRED))
if (length(missing_cols) > 0) {
  stop("Missing clusters in matrix: ", paste(missing_cols, collapse = ", "))
}
# Reorder columns
avg_expr_ordered_TILPRED <- avg_expr_TILPRED[, desired_order]
gene_labels_italic_tcells <- parse(text = paste0("italic('", rownames(avg_expr_ordered_TILPRED), "')"))

num_rows_TILPRED <- nrow(avg_expr_ordered_TILPRED)
pheatmap(avg_expr_ordered_TILPRED, 
         scale = "row",
         cluster_rows = FALSE, 
         cluster_cols = FALSE,
         angle_col = 45,
         border_color = "grey30",
         fontsize_row = 8,
         fontsize_col = 10,
         cellwidth = 14,
         height = num_rows_TILPRED * 0.6,
         width = 6,
         labels_row = gene_labels_italic_tcells,
         main = "Top Marker Expression Across TILPRED T Cell States")


#### Figure S4 options
# UMAP colored by functional T cell subsets
DimPlot(predicted_states, 
        reduction = "umap", 
        group.by = "functional.cluster", 
        cols = custom_palette, 
        label = FALSE, label.size = 4) + 
  ggtitle("T cell subsets - UMAP")

# Tabulate proportions
tcell_prop <- predicted_states@meta.data %>%
  group_by(Condition, functional.cluster) %>%
  summarise(n = n(), .groups = "drop") %>%
  group_by(Condition) %>%
  mutate(freq = n / sum(n))

# Plot stacked barplot
ggplot(tcell_prop, aes(x = Condition, y = freq, fill = functional.cluster)) +
  geom_bar(stat = "identity", position = "stack", width = 0.8, color = "black", size = 0.5) +
  scale_fill_manual(values = custom_palette) +
  scale_x_discrete(expand = c(0.2,0)) +
  scale_y_continuous(expand = c(0,0))+
  theme_minimal() +
  theme(panel.grid = element_blank(), 
        axis.line = element_line(size = 1, colour = "black", linetype=1),
        axis.ticks = element_line(size = 1, color="black"),
        axis.text.y = element_text(size = 12),
        axis.text.x = element_text(size = 12))+
  labs(title = "Proportional composition of T cell subsets",
       y = "Fraction of cells", x = "")



#---------- DimPlot T cell focused - TILPRED - TOTAL CD4-CD8
# Put FloxFlox before CreFlox in the graph
predicted_states$Condition <- factor(predicted_states$Condition, levels = c("FloxFlox", "CreFlox"))

# You can then visualize the predicted cell states on UMAP:
custom_palette <- c(
  "CD8_Tex"            = "#c135d8",
  "CD8_NaiveLike"      = "#c135d8",
  "Treg"               = "#1bb1b1",
  "Th1"                = "#ffbc82",
  "CD4_NaiveLike"      = "#ffbc82",
  "CD8_EffectorMemory" = "#c135d8"
)

DimPlot(predicted_states, 
        group.by = "functional.cluster", 
        label = FALSE, 
        cols = custom_palette,
        pt.size = 2)

DimPlot(predicted_states, 
        group.by = "functional.cluster", 
        label = FALSE, 
        cols = custom_palette,
        pt.size = 2,
        split.by = "Condition",
        label.size = 5) +
  theme(panel.spacing = unit(2, "lines"))  # increase inter-panel spacing

### Plotting the functional.cluster by condition in INDIVIDUAL FILES! (THIS IMAGE IS FOR PAPER)
# suppose your Seurat object has a metadata column called "Condition"
# pull out the two condition levels
conds <- unique(predicted_states$Condition)

for (cond in conds) {
  # subset to one condition
  so_sub <- subset(predicted_states, Condition == cond)
  
  # build the plot (no split.by, and theme_classic() removes grid lines)
  p <- DimPlot(
    so_sub,
    group.by    = "functional.cluster",
    label       = FALSE,
    cols        = custom_palette,
    pt.size     = 2,
    label.size  = 5
  ) +
    ggtitle(cond) +
    theme_classic(base_size = 14)
  
  # print it to your R graphics device
  print(p)
}
###

#---------- DimPlot T cell focused - TILPRED
# You can then visualize the predicted cell states on UMAP:
custom_palette <- c(
  "CD8_Tex"            = "#c135d8",
  "CD8_NaiveLike"      = "#dc6f8e",
  "Treg"               = "#f59280",
  "Th1"                = "#fbdc9b",
  "CD4_NaiveLike"      = "#ffbc82",
  "CD8_EffectorMemory" = "#5c53a5"
)

DimPlot(predicted_states, 
        group.by = "functional.cluster", 
        label = FALSE, 
        cols = custom_palette,
        pt.size = 1.5)

DimPlot(predicted_states, 
        group.by = "functional.cluster", 
        label = FALSE, 
        cols = custom_palette,
        pt.size = 1.5,
        split.by = "Condition",
        label.size = 5) +
  theme(panel.spacing = unit(2, "lines"))  # increase inter-panel spacing


# Visualize the projected query cells on the reference UMAP
TILPRED_palette <- c(
  "CD8_Tex" = "#c135d8",
  "CD8_Tpex" = "#fcbd33",
  "CD8_EarlyActiv" = "#ab5b9e",
  "Tfh" = "#6a95e7",
  "CD8_NaiveLike" = "#dc6f8e",
  "Treg" = "#f59280",
  "Th1" = "#fbdc9b",
  "CD4_NaiveLike" = "#ffbc82",
  "CD8_EffectorMemory" = "#5c53a5"
)

plot.projection(projected_Tcells_direct,
                ref = ref,
                cols = TILPRED_palette)

#
#---------- DimPlot DC focused
# Put FloxFlox before CreFlox in the graph
seurat_DCs_v2$Condition <- factor(seurat_DCs_v2$Condition, levels = c("FloxFlox", "CreFlox"))

custom_palette_DCs <- c(
  "cDC1"            = "#3242BD",
  "cDC2"      = "#009c4b",
  "migDC"               = "#c5cdf7"
)

DimPlot(seurat_DCs_v2, 
        group.by = "dc_cluster_names", 
        label = FALSE, 
        cols = custom_palette_DCs,
        pt.size = 2)

DimPlot(seurat_DCs_v2, 
        group.by = "dc_cluster_names", 
        label = FALSE, 
        cols = custom_palette_DCs,
        pt.size = 2,
        split.by = "Condition")

### Plotting the functional.cluster by condition in INDIVIDUAL FILES! (THIS IMAGE IS FOR PAPER)
# suppose your Seurat object has a metadata column called "Condition"
# pull out the two condition levels
conds_DCs <- unique(seurat_DCs_v2$Condition)

for (cond in conds_DCs) {
  # subset to one condition
  so_sub_DC <- subset(seurat_DCs_v2, Condition == cond)
  
  # build the plot (no split.by, and theme_classic() removes grid lines)
  p <- DimPlot(
    so_sub_DC,
    group.by    = "dc_cluster_names",
    label       = FALSE,
    cols        = custom_palette_DCs,
    pt.size     = 2,
    label.size  = 5
  ) +
    ggtitle(cond) +
    theme_classic(base_size = 14)
  
  # print it to your R graphics device
  print(p)
}
###

#
#---------- Heatmaps para poucos DC genes
# Define gene sets
gene_sets <- list(
  "DC_maturation" = c("Cd40", "Cd80", "Cd86", "Relb", "Cd83"),
  "DC_regulatory" = c("Cd274","Pdcd1lg2","Cd200","Fas","Aldh1a2","Socs1","Socs2"),
  "DC_migration" = c("Ccr7","Myo1g","Cxcl16","Adam8","Icam1","Fscn1","Marcks","Marcksl1"),
  "DC_crossPres" = c("Tap1","Tap2","Ctss","Ctsd","Cybb","Rab27a",
                     "Sec22b","Sec61a1","Sec61b","Sec61g","Derl1","Vcp")
)

# Specify the desired cluster order
desired_cluster_order <- c("migDC", "cDC1", "cDC2")

# Get normalized SCT expression matrix
expr_data_DC <- GetAssayData(seurat_DCs_v2, assay = "SCT", slot = "data")

# Loop over gene sets and plot heatmaps
for (set_name in names(gene_sets)) {
  genes <- gene_sets[[set_name]]
  genes_available <- intersect(genes, rownames(expr_data_DC))
  expr_data_subset <- expr_data_DC[genes_available, ]
  
  # Compute cluster averages
  expr_means <- sapply(desired_cluster_order, function(cluster) {
    cells_in_cluster <- colnames(seurat_DCs_v2)[seurat_DCs_v2$dc_cluster_names == cluster]
    Matrix::rowMeans(expr_data_subset[, cells_in_cluster, drop = FALSE])
  })
  
  # Scale by gene (row)
  expr_means_scaled <- t(scale(t(expr_means)))
  
  # ------ TRUE ITALIC LABELS -------
  gene_labels_italic <- parse(text = paste0("italic('", rownames(expr_means_scaled), "')"))
  
  # Plot
  pheatmap(expr_means_scaled,
           cluster_rows = TRUE,
           cluster_cols = FALSE,
           labels_row = gene_labels_italic,
           main = paste("Set -", set_name),
           fontsize_row = 12,
           fontsize_col = 12,
           angle_col = 45,
           color = colorRampPalette(c("navy", "white", "firebrick3"))(100))
}


### DotPlots
for (set_name in names(gene_sets)) {
  genes <- gene_sets[[set_name]]
  
  p <- DotPlot(seurat_DCs_v2,
               features = genes,
               group.by = "dc_cluster_names") +
    scale_color_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    labs(title = paste0("DotPlot - ", set_name),
         x = "Gene", y = "Cluster",
         color = "Avg Expression", size = "Pct Expressing")
  
  print(p)  # <--- This is what was missing!
}

## Only heatmap to cDC1 by condition
library(reshape2)
for (set_name in names(gene_sets)) {
  genes <- gene_sets[[set_name]]
  genes_available <- intersect(genes, rownames(expr_data_DC))
  expr_data_subset <- expr_data_DC[genes_available, ]
  
  # Filter only cDC1 across all conditions
  combined_groups <- unique(paste(seurat_DCs_v2$dc_cluster_names, seurat_DCs_v2$Condition, sep = "_"))
  combined_groups <- combined_groups[grepl("^cDC1_", combined_groups)]  # Keep only cDC1
  
  # Compute average expression per gene per group
  expr_means <- sapply(combined_groups, function(group) {
    parts <- strsplit(group, "_")[[1]]
    cluster <- parts[1]
    condition <- parts[2]
    cells_in_group <- colnames(seurat_DCs_v2)[
      seurat_DCs_v2$dc_cluster_names == cluster &
        seurat_DCs_v2$Condition == condition
    ]
    if (length(cells_in_group) > 0) {
      Matrix::rowMeans(expr_data_subset[, cells_in_group, drop = FALSE])
    } else {
      rep(NA, nrow(expr_data_subset))  # if no cells, return NA
    }
  })
  
  # Remove columns with only NA
  expr_means <- expr_means[, colSums(is.na(expr_means)) < nrow(expr_means), drop = FALSE]
  
  # Remove genes (rows) where all values are NA
  expr_means <- expr_means[rowSums(is.na(expr_means)) < ncol(expr_means), , drop = FALSE]
  
  # Scale safely: avoid NaNs when all values identical
  expr_means_scaled <- t(scale(t(expr_means)))
  #expr_means_scaled <- expr_means
  # After scaling, remove any genes with NaN
  expr_means_scaled <- expr_means_scaled[complete.cases(expr_means_scaled), ]
  
  # Row labels in italic
  gene_labels_italic <- parse(text = paste0("italic('", rownames(expr_means_scaled), "')"))
  
  # Plot
  p <- pheatmap(expr_means_scaled,
                cluster_rows = TRUE,
                cluster_cols = FALSE,
                labels_row = gene_labels_italic,
                main = paste("Set -", set_name, "| cDC1 only"),
                fontsize_row = 12,
                fontsize_col = 10,
                angle_col = 45,
                color = colorRampPalette(c("navy", "white", "firebrick3"))(100),
                breaks = seq(-2, 2, length.out = 100))
  
  print(p)  # Print each plot
  
  # Prepare tidy table
  expr_means_long <- melt(expr_means)
  colnames(expr_means_long) <- c("Gene", "Group", "Expression")
  
  # Initialize matrix for percent expressing
  percent_expressing <- sapply(combined_groups, function(group) {
    parts <- strsplit(group, "_")[[1]]
    cluster <- parts[1]
    condition <- parts[2]
    cells_in_group <- colnames(seurat_DCs_v2)[
      seurat_DCs_v2$dc_cluster_names == cluster &
        seurat_DCs_v2$Condition == condition
    ]
    if (length(cells_in_group) > 0) {
      expr_subset <- expr_data_subset[, cells_in_group, drop = FALSE]
      # Percent of cells with expression > 0
      rowMeans(expr_subset > 0) * 100
    } else {
      rep(NA, nrow(expr_data_subset))
    }
  })
  
  # Clean NAs
  percent_expressing <- percent_expressing[, colSums(is.na(percent_expressing)) < nrow(percent_expressing), drop = FALSE]
  
  # Now combine into a long dataframe
  expr_means_long <- melt(expr_means)
  colnames(expr_means_long) <- c("Gene", "Group", "Expression")
  
  percent_long <- melt(percent_expressing)
  colnames(percent_long) <- c("Gene", "Group", "PercentExpressing")
  
  # Merge expression and percent
  plot_data <- merge(expr_means_long, percent_long, by = c("Gene", "Group"))
  
  # Now plot
  p_dot <- ggplot(plot_data, aes(x = Group, y = Gene)) +
    geom_point(aes(size = PercentExpressing, color = Expression)) +
    scale_color_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0) +
    theme_minimal() +
    labs(title = paste("DotPlot -", set_name, "| cDC1 only"),
         x = "Cluster-Condition", y = "Gene",
         color = "Avg Expression", size = "% Expressing") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1),
          strip.text = element_text(face = "italic"))
  
  print(p_dot)
  
  write.csv(expr_means, file = paste0("expr_means_", set_name, "_cDC1_only.csv"))
  
}